knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
# wrangling packages
library(here) # here makes a project transportable
library(janitor) # clean_names
library(readxl) # read excel, duh!
library(data.table) # magical data frames
library(magrittr) # pipes
library(stringr) # string functions
library(forcats) # factor functions
# analysis packages
library(emmeans) # the workhorse for inference
library(nlme) # gls and some lmm
library(lme4) # linear mixed models
library(lmerTest) # linear mixed model inference
library(afex) # ANOVA linear models
library(glmmTMB) # generalized linear models
library(MASS) # negative binomial and some other functions
library(car) # model checking and ANOVA
library(DHARMa) # model checking
library(mvtnorm)
# graphing packages
library(ggsci) # color palettes
library(ggpubr) # publication quality plots
library(ggforce) # better jitter
library(cowplot) # combine plots
library(knitr) # kable tables
library(kableExtra) # kable_styling tables
library(ggdendro)
library(dendextend)
library(ggiraph)
library(GGally)
# ggplot_the_model.R packages not loaded above
library(insight)
library(lazyWeave)
# use here from the here package
here <- here::here
# use clean_names from the janitor package
clean_names <- janitor::clean_names
# use transpose from data.table
transpose <- data.table::transpose
# load functions used by this text written by me
# ggplot_the_model.R needs to be in the folder "R"
# if you didn't download this and add to your R folder in your
# project, then this line will cause an error
#source_path <- here("R", "ggplot_the_model.R")
#source(source_path)
data_folder <- "data"
image_folder <- "images"
output_folder <- "output"
deg_2_rad <- function(x){
rad <- x*pi/180
return(rad)
}
# data_path <- here(data_folder, "ghost_grappler.txt")
# dt <- fread(data_path)
# bike_label = "Tumbleweed Stargazer 2022"
# bike_range = "b1:h21"
read_bike <- function(bike_label = "Breezer Radar X Pro 2022",
bike_range = "B1:I18"){
data_file <- "bikes.xlsx"
data_path <- here(data_folder, data_file)
bike_wide <- read_excel(data_path,
sheet = bike_label,
range = bike_range) %>%
data.table
# re-read with coltype = numeric
# col_type_list <- c("text", "text", rep("numeric", ncol(bike_wide)-2))
# bike_wide <- read_excel(data_path,
# sheet = bike_label,
# range = bike_range,
# col_types = col_type_list) %>%
# data.table
bike_model <- substr(bike_label, 1, nchar(bike_label) - 5)
model_year <- substr(bike_label,
nchar(bike_label) - 4,
nchar(bike_label))
bike_wide <- bike_wide[, -2]
bike <- data.table(
model = bike_model,
year = model_year,
transpose(bike_wide,
keep.names = "frame_size",
make.names = 1)
)
keep_names <- c("model","frame_size", "seat_tube_length", "top_tube_effective_length", "head_tube_length", "seat_tube_angle", "head_tube_angle", "chainstay_length", "wheelbase", "bottom_bracket_drop", "fork_offset_rake", "stack", "reach", "standover", "stem_length", "handlebar_width", "crank_length", "wheel_size", "tire_width_spec", "tire_width_max")
bike <- bike[, .SD, .SDcols = keep_names]
# constructed measures
bike[, model_size := paste(model, frame_size)]
bike[, rear_center := sqrt(chainstay_length^2 - bottom_bracket_drop^2)] # horizontal
bike[, front_center := wheelbase - rear_center] # horizontal
bike[, seat_center := stack/tan(deg_2_rad(seat_tube_angle))]
# ratios
bike[, stack_reach := stack/reach]
bike[, front_rear := front_center/rear_center]
bike[, rear_wheelbase := rear_center/wheelbase]
# decompositions
bike[, seat_v := seat_tube_length * sin(deg_2_rad(seat_tube_angle))]
bike[, seat_h := seat_tube_length * cos(deg_2_rad(seat_tube_angle))]
bike[, head_v := head_tube_length * sin(deg_2_rad(head_tube_angle))]
bike[, head_h := head_tube_length * cos(deg_2_rad(head_tube_angle))]
# landmarks
bike[, x1 := 0]
bike[, y1 := 0]
bike[, x2 := rear_center - seat_center]
bike[, y2 := stack - bottom_bracket_drop]
bike[, x3 := rear_center + reach]
bike[, y3 := stack - bottom_bracket_drop]
bike[, x4 := x3 + head_h]
bike[, y4 := y3 - head_v]
bike[, x5 := wheelbase]
bike[, y5 := 0]
bike[, x6 := stack - bottom_bracket_drop]
bike[, y6 := -bottom_bracket_drop]
bike[, x7 := x6 - seat_h]
bike[, y7 := seat_v]
# landmarks_named
bike[, rear_x := x1]
bike[, rear_y := y1]
bike[, seat_x := x2]
bike[, seat_y := y2]
bike[, head_x := x3]
bike[, head_y := y3]
bike[, crown_x := x4]
bike[, crown_y := y4]
bike[, front_x := x5]
bike[, front_y := y5]
bike[, bottom_x := x6]
bike[, bottom_y := y6]
bike[, seattube_x := x7]
bike[, seattube_y := y7]
return(bike)
}
data_path <- here(data_folder, "bike_list.txt")
bike_list <- fread(data_path)
geobike <- data.table(NULL)
for(i in 1:nrow(bike_list)){
bike_i <- read_bike(bike_label = as.character(bike_list[i, "model"]),
bike_range = as.character(bike_list[i, "data_range"]))
bike_i[, my_fit := ifelse(frame_size == c(bike_list[i, "my_fit"]), TRUE, FALSE)]
geobike <- rbind(geobike, bike_i)
}
# add Breezer small to my_fit
geobike[model == "Breezer Radar X Pro" & frame_size == "48cm (S)", my_fit := TRUE]
# add Boone 54 to my_fit
geobike[model == "Trek Boone 6" & frame_size == "54 cm", my_fit := TRUE]
# add column of shape id for plots
shape_list <- c(15,17,19,0,2)
n_shapes <- length(shape_list)
n_models <- length(unique(geobike[, model]))
n_recycles <- floor(n_models/n_shapes)
left_over <- n_models - n_recycles*n_shapes
model_2_shape_map <- c(rep(shape_list, n_recycles), shape_list[1:left_over])
geobike[, shape_id := model_2_shape_map[as.integer(as.factor(model))]]
y_cols <- c("rear_x", "rear_y",
"seat_x", "seat_y",
"head_x", "head_y",
"crown_x", "crown_y",
"front_x", "front_y",
"bottom_x", "bottom_y",
"seattube_x", "seattube_y")
# center X at bottom bracket
geobike[, rear_x := rear_x - bottom_x]
geobike[, seat_x := seat_x - bottom_x]
geobike[, head_x := head_x - bottom_x]
geobike[, crown_x := crown_x - bottom_x]
geobike[, front_x := front_x - bottom_x]
geobike[, bottom_x := bottom_x - bottom_x]
geobike[, seattube_x := seattube_x - bottom_x]
# stack + reach size
geobike[, stack_reach_size := sqrt(stack^2 + reach^2)]
geobike[, stack_reach_size := sqrt(stack * reach)]
# effective seat tube + effective top tube size
geobike[, seat_tube_effective_length :=
sqrt((seat_x - bottom_x)^2 + (seat_y - bottom_y)^2)]
geobike[, rider_size := sqrt(seat_tube_effective_length *
top_tube_effective_length)]
# upper triangle centroid size
geobike[, centroid_x := (seat_x + bottom_x + head_x)/3]
geobike[, centroid_y := (seat_y + bottom_y + head_y)/3]
geobike[, centroid_size :=
sqrt((seat_x - centroid_x)^2 +
(seat_y - centroid_y)^2 +
(bottom_x - centroid_x)^2 +
(bottom_y - centroid_y)^2 +
(head_x - head_x)^2 +
(head_y - head_y)^2)]
ggscatter(data = geobike,
x = "rider_size",
y = "centroid_size")
gghistogram(data = geobike,
x = "rider_size")
gghistogram(data = geobike,
x = "reach")
size <- "stack_reach_size"
geobike[, rear_xs := rear_x/get(size)]
geobike[, rear_ys := rear_y/get(size)]
geobike[, seat_xs := seat_x/get(size)]
geobike[, seat_ys := seat_y/get(size)]
geobike[, head_xs := head_x/get(size)]
geobike[, head_ys := head_y/get(size)]
geobike[, crown_xs := crown_x/get(size)]
geobike[, crown_ys := crown_y/get(size)]
geobike[, front_xs := front_x/get(size)]
geobike[, front_ys := front_y/get(size)]
geobike[, bottom_xs := bottom_x/get(size)]
geobike[, bottom_ys := bottom_y/get(size)]
geobike[, seattube_xs := seattube_x/get(size)]
geobike[, seattube_ys := seattube_y/get(size)]
y_cols <- c("stack_reach", "front_rear", "bottom_bracket_drop", "fork_offset_rake", "head_tube_angle")
ggpairs(geobike[, .SD, .SDcols = y_cols])
my_fit <- geobike[my_fit == TRUE,]
shape_map <- setNames(geobike$shape_id, geobike$model)
nudge_percent <- 0.01
Notes
gg1 <- ggplot(data = geobike,
aes(x = stack,
y = reach,
color = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size,
shape = model),
show.legend = FALSE) +
scale_shape_manual(values = shape_map)
nudge_pos <- nudge_percent*(max(my_fit$stack) - min(my_fit$stack))
gg2 <- ggplot(data = my_fit,
aes(x = stack,
y = reach,
color = model,
label = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size),
show.legend = FALSE) +
geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)
girafe(ggobj = gg1)
girafe(ggobj = gg2)
Notes
gg1 <- ggplot(data = geobike,
aes(x = front_center,
y = rear_center,
color = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size,
shape = model),
show.legend = FALSE) +
scale_shape_manual(values = shape_map)
nudge_pos <- nudge_percent * (max(my_fit$front_center) -
min(my_fit$front_center))
gg2 <- ggplot(data = my_fit,
aes(x = front_center,
y = rear_center,
color = model,
label = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size),
show.legend = FALSE) +
geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)
girafe(ggobj = gg1)
girafe(ggobj = gg2)
gg1 <- ggplot(data = geobike,
aes(x = reach,
y = rear_wheelbase,
color = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size,
shape = model),
show.legend = FALSE) +
scale_shape_manual(values = shape_map)
nudge_pos <- nudge_percent * (max(my_fit$reach) -
min(my_fit$reach))
gg2 <- ggplot(data = my_fit,
aes(x = reach,
y = rear_wheelbase,
color = model,
label = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size),
show.legend = FALSE) +
geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)
girafe(ggobj = gg1)
girafe(ggobj = gg2)
Notes
gg1 <- ggplot(data = geobike,
aes(x = reach,
y = seat_tube_angle,
color = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size,
shape = model),
show.legend = FALSE) +
scale_shape_manual(values = shape_map)
nudge_pos <- nudge_percent * (max(my_fit$reach) -
min(my_fit$reach))
gg2 <- ggplot(data = my_fit,
aes(x = reach,
y = seat_tube_angle,
color = model,
label = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size),
show.legend = FALSE) +
geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)
girafe(ggobj = gg1)
girafe(ggobj = gg2)
gg1 <- ggplot(data = geobike,
aes(x = rear_wheelbase,
y = seat_tube_angle,
color = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size,
shape = model),
show.legend = FALSE) +
scale_shape_manual(values = shape_map)
nudge_pos <- nudge_percent * (max(my_fit$rear_wheelbase) -
min(my_fit$rear_wheelbase))
gg2 <- ggplot(data = my_fit,
aes(x = rear_wheelbase,
y = seat_tube_angle,
color = model,
label = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size),
show.legend = FALSE) +
geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)
girafe(ggobj = gg1)
girafe(ggobj = gg2)
Notes
gg1 <- ggplot(data = geobike,
aes(x = head_tube_angle,
y = seat_tube_angle,
color = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size,
shape = model),
show.legend = FALSE) +
scale_shape_manual(values = shape_map)
nudge_pos <- nudge_percent * (max(my_fit$head_tube_angle) -
min(my_fit$head_tube_angle))
gg2 <- ggplot(data = my_fit,
aes(x = head_tube_angle,
y = seat_tube_angle,
color = model,
label = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size),
show.legend = FALSE) +
geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)
girafe(ggobj = gg1)
girafe(ggobj = gg2)
gg1 <- ggplot(data = geobike,
aes(x = head_tube_angle,
y = fork_offset_rake,
color = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size,
shape = model),
show.legend = FALSE) +
scale_shape_manual(values = shape_map)
nudge_pos <- nudge_percent * (max(my_fit$head_tube_angle) -
min(my_fit$head_tube_angle))
gg2 <- ggplot(data = my_fit,
aes(x = head_tube_angle,
y = fork_offset_rake,
color = model,
label = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size),
show.legend = FALSE) +
geom_text(hjust = 0, nudge_x = nudge_pos, size = 2, show.legend = FALSE)
girafe(ggobj = gg1)
girafe(ggobj = gg2)
y_cols <- c("rear_xs",
"seat_xs", "seat_ys",
"head_xs", "head_ys",
"crown_xs", "crown_ys",
"front_xs",
"bottom_ys")
y_cols <- c("rear_x",
"seat_x", "seat_y",
"head_x", "head_y",
"crown_x", "crown_y",
"front_x",
"bottom_y")
X <- geobike[, .SD, .SDcols = y_cols] %>%
as.matrix()
S <- cov(X)
geo_eigen <- eigen(S)
L <- geo_eigen$values
E <- geo_eigen$vector
PC <- X %*% E
geobike[, pc1 := PC[, 1]]
geobike[, pc2 := PC[, 2]]
geobike[, pc3 := PC[, 3]]
round(cor(geobike[, .SD, .SDcols = c("pc1", "pc2", "pc3", y_cols)])[, 1:3], 2)
## pc1 pc2 pc3
## pc1 1.00 0.00 0.00
## pc2 0.00 1.00 0.00
## pc3 0.00 0.00 1.00
## rear_x -0.99 0.14 -0.03
## seat_x -0.98 0.11 0.15
## seat_y 0.99 -0.14 0.03
## head_x -0.88 -0.45 -0.02
## head_y 0.99 -0.14 0.03
## crown_x -0.69 -0.69 -0.20
## crown_y 0.58 0.12 0.80
## front_x -0.46 -0.78 0.39
## bottom_y 0.25 0.34 -0.11
gg1 <- ggplot(data = geobike,
aes(x = pc1,
y = pc2,
color = model,
shape = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size), show.legend = FALSE) +
scale_shape_manual(values = shape_map)
gg2 <- ggplot(data = geobike,
aes(x = pc2,
y = pc3,
color = model,
shape = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size), show.legend = FALSE) +
scale_shape_manual(values = shape_map)
gg3 <- ggplot(data = geobike,
aes(x = pc1,
y = pc3,
color = model,
shape = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size), show.legend = FALSE) +
scale_shape_manual(values = shape_map)
girafe(ggobj = gg1)
girafe(ggobj = gg2)
girafe(ggobj = gg3)
y_cols <- c("stack_reach", "rear_wheelbase", "bottom_bracket_drop", "fork_offset_rake", "head_tube_angle", "seat_tube_angle")
X <- geobike[, .SD, .SDcols = y_cols] %>%
scale()
S <- cov(X)
geo_eigen <- eigen(S)
L <- geo_eigen$values
E <- geo_eigen$vector
PC <- X %*% E
geobike[, pc1 := PC[, 1]]
geobike[, pc2 := PC[, 2]]
geobike[, pc3 := PC[, 3]]
round(cor(geobike[, .SD, .SDcols = c("pc1", "pc2", "pc3", y_cols)])[, 1:3], 2)
## pc1 pc2 pc3
## pc1 1.00 0.00 0.00
## pc2 0.00 1.00 0.00
## pc3 0.00 0.00 1.00
## stack_reach -0.50 0.56 -0.44
## rear_wheelbase 0.45 0.05 -0.84
## bottom_bracket_drop 0.07 -0.63 0.07
## fork_offset_rake -0.79 -0.10 -0.27
## head_tube_angle 0.76 0.44 -0.01
## seat_tube_angle 0.14 -0.79 -0.39
gg1 <- ggplot(data = geobike,
aes(x = pc1,
y = pc2,
color = model,
shape = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size),
show.legend = FALSE) +
scale_shape_manual(values = shape_map) +
xlab("PC1: Offset - HTA") +
ylab("PC2: STA - StackReach + BBD")
gg2 <- ggplot(data = geobike,
aes(x = pc2,
y = pc3,
color = model,
shape = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size),
show.legend = FALSE) +
scale_shape_manual(values = shape_map) +
xlab("PC2: STA - Stack_Reach + BBD") +
ylab("PC3: Rear_Wheelbase")
gg3 <- ggplot(data = geobike,
aes(x = pc1,
y = pc3,
color = model,
shape = model)) +
geom_point_interactive(aes(tooltip = model_size,
data_id = model_size),
show.legend = FALSE) +
scale_shape_manual(values = shape_map) +
xlab("PC1: Offset - HTA") +
ylab("PC3: Rear_Wheelbase")
girafe(ggobj = gg1)
girafe(ggobj = gg2)
girafe(ggobj = gg3)
treed <- function(geobike_subset,
y_cols,
scale_it = TRUE,
center_it = TRUE
){
dd <- dist(scale(geobike_subset[, .SD, .SDcols = y_cols],
center = center_it,
scale = scale_it),
method = "euclidean")
dendro <- hclust(dd, method = "ward.D2") %>%
as.dendrogram() %>%
place_labels(paste(geobike_subset[, model],
geobike_subset[, frame_size]))
gg <- ggdendrogram(dendro)
return(gg)
}
y_cols <- c("rear_x",
"seat_x", "seat_y",
"head_x", "head_y",
"crown_x", "crown_y",
"front_x",
"bottom_y",
"seattube_x", "seattube_y")
geobike_subset <- geobike[my_fit == TRUE,]
scale_it <- FALSE
center_it <- FALSE
gg <- treed(geobike_subset,
y_cols,
scale_it,
center_it)
gg
y_cols <- c("rear_x",
"seat_x", "seat_y",
"head_x", "head_y",
"crown_x", "crown_y",
"front_x")
geobike_subset <- geobike[my_fit == TRUE,]
scale_it <- FALSE
center_it <- FALSE
gg <- treed(geobike_subset,
y_cols,
scale_it,
center_it)
gg
y_cols <- c("stack", "reach", "front_center", "rear_center", "bottom_bracket_drop", "fork_offset_rake", "head_tube_angle", "seat_tube_angle", "seat_tube_length")
geobike_subset <- geobike[my_fit == TRUE,]
scale_it <- TRUE
center_it <- TRUE
gg <- treed(geobike_subset,
y_cols,
scale_it,
center_it)
gg
Short seat tube length designed for a dropper post
y_cols <- c("stack", "reach", "front_center", "rear_center", "bottom_bracket_drop", "fork_offset_rake", "head_tube_angle", "seat_tube_angle")
geobike_subset <- geobike[my_fit == TRUE,]
scale_it <- TRUE
center_it <- TRUE
gg <- treed(geobike_subset,
y_cols,
scale_it,
center_it)
gg
Canyon and Chamois Hager are wacky fork offsets
From left to right
y_cols <- c("stack", "reach", "front_center", "rear_center", "bottom_bracket_drop", "head_tube_angle", "seat_tube_angle")
geobike_subset <- geobike[my_fit == TRUE,]
scale_it <- TRUE
center_it <- TRUE
gg <- treed(geobike_subset,
y_cols,
scale_it,
center_it)
gg
y_cols <- c("stack_reach", "rear_wheelbase", "bottom_bracket_drop", "fork_offset_rake", "head_tube_angle", "seat_tube_angle", "seat_tube_length")
geobike_subset <- geobike[my_fit == TRUE,]
scale_it <- TRUE
center_it <- TRUE
gg <- treed(geobike_subset,
y_cols,
scale_it,
center_it)
gg
y_cols <- c("stack_reach", "rear_wheelbase", "bottom_bracket_drop", "fork_offset_rake", "head_tube_angle", "seat_tube_angle")
geobike_subset <- geobike[my_fit == TRUE,]
scale_it <- TRUE
center_it <- TRUE
gg <- treed(geobike_subset,
y_cols,
scale_it,
center_it)
gg
From left to right
y_cols <- c("stack_reach", "rear_wheelbase", "bottom_bracket_drop", "head_tube_angle", "seat_tube_angle")
geobike_subset <- geobike[my_fit == TRUE,]
scale_it <- TRUE
center_it <- TRUE
gg <- treed(geobike_subset,
y_cols,
scale_it,
center_it)
gg
y_cols <- c("stack_reach", "rear_wheelbase")
geobike_subset <- geobike[my_fit == TRUE,]
scale_it <- TRUE
center_it <- TRUE
gg <- treed(geobike_subset,
y_cols,
scale_it,
center_it)
gg